/* calculate ravenpack skewness measure */
* add to the dataset industry_skew_major;
libname edu '!userprofile\\Dropbox\Education\Replication\Data'; 

/* ess: 50 is the neutral score, from the ravenpack manual, p.19-21, already scaled it to -1 to 1 */
data raven;
  set edu.raven;
run;

* only keep one story for each day (Dang et al, JFE 2015 use daily ESS value), we keep the one with the highest ens;
proc sort data = raven;
  by permno date descending ens; 
run;

data raven;
  set raven;
  by permno date;
  if first.date;
run;

/* get mktcap and permco */
proc sql;
  create table permco as
  select distinct permco, permno, year(date) as year
  from edu.crsp_data;
quit;

proc sql;
  create table raven as
  select a.*, b.permco
  from raven as a left join permco as b
  on a.permno = b.permno and a.year = b.year;
quit;

proc sql;
  create table raven as
  select a.*, abs(b.prc)*b.shrout as mktcap
  from raven as a left join edu.crsp_data as b
  on a.permno = b.permno and a.year = year(b.date) and month(b.date) = 12;
quit;

/* for each permco, only the permno that has the largest market cap will be counted (get rid of multiple share classes) */
proc sort data = raven;
  by permco year descending mktcap;
run;

data raven2;
  set raven;
  by permco year;
  if first.year;
run;

proc sql;
  create table raven as 
  select a.*
  from raven as a, raven2 as b
  where a.permno = b.permno and a.year = b.year;
quit;

/* calculate firm-year-level total scores */
proc sort data = raven; by permno year; run;

proc means data = raven noprint;
  by permno year;
  var ess;
  output out = raven2
  sum = esssum;
run;

proc sql;
  create table raven2 as
  select a.*, abs(b.prc)*b.shrout as lagmktcap
  from raven2 as a left join edu.crsp_data as b
  on a.permno = b.permno and a.year - year(b.date) = 1 and month(b.date) = 12;
quit;

/* get # employees, if there are duplicate entries, choose the larger and the most recent */
data emp (keep = lpermno datadate year emp); set edu.crspcompustat_data; year = year(datadate); if emp = . then delete; if 2000 <= year <= 2015; run; 

proc sort data = emp; by lpermno year descending datadate descending emp; run;
proc sort data = emp nodupkey; by lpermno year; run;

proc sql;
  create table raven2 as
  select distinct a.lpermno as permno, a.emp, a.year, b.*
  from emp as a left join raven2 as b
  on a.lpermno = b.permno and a.year = b.year;
quit;

/* map SIC2 and NAICS3 into major code */
proc sql;
  create table raven3 as
  select distinct a.*, floor(b.siccd/100) as sic_2, compress(substr(b.naics,1,3)) as naics_3
  from raven2 as a left join edu.crsp_data as b
  on a.permno = b.permno and a.year = year(b.date) and month(b.date) = 12;
quit;

proc sql;
  create table temp as
  select a.*, b.major 
  from raven3 as a left join edu.sic_2_major as b
  on a.sic_2 = b.sic_2;
quit;

data temp;
  set temp;
  if year >= 2005 then delete;
run;

data temp2; set raven3; naics3 = input(naics_3, 12.); run;

proc sql;
  create table temp2 as
  select a.*, b.major 
  from temp2 as a left join edu.naics_3_major as b
  on a.naics3 = b.naics_3;
quit;

data temp2 (drop = naics3);
  set temp2;
  if year >= 2005;
run;

data raven2_major; set temp; run;
proc append base = raven2_major data = temp2; run;

data raven2_major;
  set raven2_major;
  if not missing (major);
run;

proc sort data = raven2_major;
by year major;
run; 

proc univariate data = raven2_major vardef = df noprint;
by year major;
var esssum;
weight emp;
output out=skewness_2e_major skew=ravenskew1e;
run;

proc sql;
  create table edu.industry_skew_major as
  select a.*, b.ravenskew1e
  from edu.industry_skew_major as a left join skewness_2e_major as b
  on a.year = b.year and a.major = b.major;
quit;


